Let us make a map of luminosity by focusing on Europe in 1992 and 2012
#Step1: Loadning relevant librarieslibrary(sf)library(stars)library(ggplot2)sf_use_s2(FALSE)#Step2: Read country bordersborders <-st_read(dsn="./data/boundaries/all-country-boundaries.shp", quiet =TRUE)#Step3: Simplifying border to make them easier to use on our machineborders<-st_simplify(borders, dTolerance=0.1)#Step4: Creating a folder called "F101992"target_dir <-"./data/luminosity/F101992"#Step5: Create the target directory if it doesn't existif (!dir.exists(target_dir)) {dir.create(target_dir)}#Step6: Unzip the fileuntar("./data/luminosity/F101992.v4.tar", exdir = target_dir)#Step7: Unzip the file furtherR.utils::gunzip("./data/luminosity/F101992/F101992.v4b_web.avg_vis.tif.gz", remove =FALSE, overwrite=TRUE)#Step8: Read the rasterf1992<-read_stars("./data/luminosity/F101992/F101992.v4b_web.avg_vis.tif")#Step9: Make sure the two files have the same CRSst_crs(f1992)<-st_crs(borders)#Step10: Reduce the resolution of the raster: Higher dx/day values mean lower resolutionf1992b<-st_warp(f1992, st_as_stars(st_bbox(f1992), dx =0.3, dy =0.3))#Step11: Rename the rasternames(f1992b)<-"lights_1992"#Step12:Defining the countries defining the regions of interestnorway<-subset(borders, SOVEREIGNT =="Norway")max_lat_y_eu<-st_bbox(norway)["ymax"]greece<-subset(borders, SOVEREIGNT =="Greece")min_lat_y_eu<-st_bbox(greece)["ymin"]ukraine<-subset(borders, SOVEREIGNT =="Ukraine")max_lon_x_eu<-st_bbox(ukraine)["xmax"]portugal<-subset(borders, SOVEREIGNT =="Portugal")min_lon_x_eu<-st_bbox(portugal)["xmin"]#Step13: Making the mapfig1992<-ggplot()+geom_stars(data=f1992b)+scale_fill_gradientn(name ="", colours=c("black","white"))+geom_sf(data=borders, linewidth =0.1, fill =NA, color ="white", alpha=0.5)+coord_sf(xlim =c(min_lon_x_eu-3, max_lon_x_eu+3), ylim =c(min_lat_y_eu-1, max_lat_y_eu-6))+theme_bw()+theme(panel.grid.major =element_blank(), panel.grid.minor =element_blank())
Working with Luminosity 1992
Let us make a map of luminosity by focusing on Europe in 1992 and 2012
Working with Luminosity 2012
Let us do the same for 2012
#Step1: Creating a folder called "F182012"target_dir <-"./data/luminosity/F182012"#Step2: Create the target directory if it doesn't existif (!dir.exists(target_dir)) {dir.create(target_dir)}#Step3: Unzip the fileuntar("./data/luminosity/F182012.v4.tar", exdir = target_dir)#Step4: Unzip the file furtherR.utils::gunzip("./data/luminosity/F182012/F182012.v4c_web.avg_vis.tif.gz", remove =FALSE, overwrite=TRUE)#Step5: Read the rasterf2012<-read_stars("./data/luminosity/F182012/F182012.v4c_web.avg_vis.tif")#Step6: Make sure the two files have the same CRSst_crs(f2012)<-st_crs(borders)#Step7: Reduce the resolution of the raster: Higher dx/day values mean lower resolutionf2012b<-st_warp(f2012, st_as_stars(st_bbox(f2012), dx =0.3, dy =0.3))#Step8: Rename the rasternames(f2012b)<-"lights_2012"#Step9: Making the mapfig2012<-ggplot()+geom_stars(data=f2012b)+scale_fill_gradientn(name ="",colours=c("black","white"))+geom_sf(data=borders, linewidth =0.1, fill =NA, color ="white", alpha=0.5)+coord_sf(xlim =c(min_lon_x_eu-3, max_lon_x_eu+3), ylim =c(min_lat_y_eu-1, max_lat_y_eu-6))+theme_bw()+theme(panel.grid.major =element_blank(), panel.grid.minor =element_blank())
Working with Luminosity 2012
fig2012
Working with Luminosity 2012
We can obviously put them side by side for comparison:
#Step1: Read country bordersger2 <-st_read(dsn="./data/gadm41_DEU_shp/gadm41_DEU_2.shp", quiet =TRUE)ger2<-st_simplify(ger2, dTolerance=0.005)ggplot()+geom_sf(data=ger2)
Germany
Let us also load the Germany country shape file.
#Step1: Read country bordersger0 <-st_read(dsn="./data/gadm41_DEU_shp/gadm41_DEU_0.shp", quiet =TRUE, promote_to_multi =FALSE)ger0<-st_simplify(ger0, dTolerance=0.02)ggplot()+geom_sf(data=ger0)
Germany
Let us now crop the lights based on the Germany shape.
Note that we are reloading the raster to work with the raster’s original resolution.
#Step1: Read the rasterf1992<-read_stars("./data/luminosity/F101992/F101992.v4b_web.avg_vis.tif")#Step2: Ensuring that the CRS is the samest_crs(f1992)<-st_crs(ger0)#Step3: Cropping rasterf1992_cropped<-st_crop(f1992, ger0)
Germany
Let us now compare the two
plot(f1992)
plot(f1992_cropped)
Germany
We can now provide more user-friendly variable names
Zonal statistics is set of techniques used to calculate statistics for specific zones.
The process involves overlaying a raster or vector dataset representing the zones of interest with another dataset containing the attribute values (such as elevation, temperature, population density, etc.).
Common statistics calculated in zonal statistics analysis include:
mean
median
sum
Zonal Statiscs
This is for example what mean zonal statistics means in the context of luminosity
Zonal Statiscs
This is for example what mean zonal statistics maximum in the context of luminosity
Germany
So, let us now aggregate the pixels at a county level
#Step1: Extracting only the relevant variableger2b<-subset(ger2, select =c("NAME_2", "NAME_1"))#Step2: Aggregating lights by polygonagg_1992<- raster::aggregate(f1992_cropped, ger2b, mean, na.rm=TRUE)
Germany
We now look at the output:
ggplot()+geom_stars(data=agg_1992)
Stars to Polygon
We now merge the data from the stars object back to the sf obejct
library(dplyr)#Step1: Turning the stars object into an sfagg_sf_1992<-st_as_sf(agg_1992)#Step2: Creating a numeric id based on dataframe index for the stars object turned sfagg_sf_1992$id<-as.numeric(rownames(agg_sf_1992))#Step3: Creating a numeric id based on dataframe indexger2b$id<-as.numeric(rownames(ger2b))#Step4: Dropping the geometryagg_sf_1992<-st_drop_geometry(agg_sf_1992)#Step5: Perfoming a left join by idagg_sf2_1992<-left_join(ger2b, agg_sf_1992, by =c("id"="id"))
Stars to Polygon
We then do the same for 2012
library(dplyr)#Step1: Read the rasterf2012<-read_stars("./data/luminosity/F182012/F182012.v4c_web.avg_vis.tif")#Step2: Ensuring that the CRS is the samest_crs(f2012)<-st_crs(ger0)#Step3: Cropping rasterf2012_cropped<-st_crop(f2012, ger0)#Step4: Renaming rastersnames(f2012_cropped)<-"lights_2012"#Step5: Turning the file into a stars objectf2012_cropped <- stars::st_as_stars(f2012_cropped)#Step2: Aggregating lights by polygonagg_2012<- raster::aggregate(f2012_cropped, ger2b, mean, na.rm=TRUE)#Step1: Turning the stars object into an sfagg_sf_2012<-st_as_sf(agg_2012)#Step2: Creating a numeric id based on dataframe index for the stars object turned sfagg_sf_2012$id<-as.numeric(rownames(agg_sf_2012))#Step3: Dropping geometryagg_sf_2012<-st_drop_geometry(agg_sf_2012)#Step3: Creating a numeric id based on dataframe index#ger2b$id<-as.numeric(rownames(ger2b))#Step4: Dropping the geometry#ger2c<-st_drop_geometry(ger2b)#Step5: Perfoming a left join by idagg_sf2<-left_join(agg_sf2_1992, agg_sf_2012, by =c("id"="id"))
Stars to Polygon
Let us now look at the output: the difference between starts and sf objects
Step4: Creating a 500m buffer to eliminate internal geometry errors
#Step5: Changing the coordinate system for Germany CRSeast_ger_diss2 =st_transform(east_ger_diss, 6875)#Step6: Creating a 500m buffer to eliminate internal geometry errorseast_ger_diss3 <-st_buffer(east_ger_diss2, dist =500)#Step7: Turning back to WGS84east_ger_diss4<-st_transform(st_as_sf(east_ger_diss3), st_crs(east_ger))
Step5: Create county centroids to identify their location in East or in the West
We then create the polygon centroid:
##Marking countries in the East vs. not vased on spatial location#Step8: Create centroidger2_centr<-st_centroid(agg_sf2)
Here is what the difference look like:
Show the code
ggplot()+geom_sf(data=ger2b)
Show the code
ggplot()+geom_sf(data=ger2_centr)
Step6: Perfoming the spatial join
We then add a column to indicate East and West.
#Step9: Adding a column to the polygon whose attribute we wanteast_ger_diss4$region<-"East"#Step10: Perfoming the spatial joinger_centr_joined<-st_join(ger2_centr, east_ger_diss4)#Step11: Replace NA with "West"ger_centr_joined$region[is.na(ger_centr_joined$region)]<-"West"
Here is what the points look like after creating that column.
Step7: Merging the spatial attribute back to polygons
We then merge the region column from the points to the polygon.
#Step12: Dropping Geometryger_centr_joined_df<-st_drop_geometry(ger_centr_joined)#Step13: Keeping only relevant variable resulting from the spatial mergeger_centr_joined_df2<-subset(ger_centr_joined_df, select=c("NAME_2", "region"))#Step14: Merging it back to the polygonger2c<-left_join(agg_sf2, ger_centr_joined_df2, by =c("NAME_2"="NAME_2"))
Step 3: Making neg. buffer to make the polygon size smaller so that we crop the internal eastern line
Step 4: Intersecting line with the smaller Germany polygon
Step 5: Calculating distance to line
Step 1: Turning the multipolygon to POLYGON
We need to first turn the multipolygon to a polygon
#Step1: Turning the multipolygon to POLYGONeast_ger_pol<-st_cast(east_ger_diss4, "POLYGON")
Let us examime the difference.
head(east_ger_pol)
Simple feature collection with 3 features and 1 field
Geometry type: POLYGON
Dimension: XY
Bounding box: xmin: 9.871475 ymin: 50.16888 xmax: 15.04897 ymax: 54.68908
Geodetic CRS: WGS 84
region x
1 East POLYGON ((10.02385 50.84353...
1.1 East POLYGON ((13.77548 54.1987,...
1.2 East POLYGON ((13.52367 54.32939...
head(east_ger_diss4)
Simple feature collection with 1 feature and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 9.871475 ymin: 50.16888 xmax: 15.04897 ymax: 54.68908
Geodetic CRS: WGS 84
x region
1 MULTIPOLYGON (((10.02385 50... East
Step 2: Turning the polygong to line
Let us turn the polygon into line
east_ger_line =st_cast(east_ger_pol,"LINESTRING")
This is what the output looks like:
ggplot()+geom_sf(data=east_ger_pol)
ggplot()+geom_sf(data=east_ger_line)
Step 3: Making neg. buffer to make the polygon size smaller so that we crop the internal eastern line
We now create an inside buffer
ger0b<-st_buffer(ger0, dist=-0.03)
Step 3: Making neg. buffer to make the polygon size smaller so that we crop the internal eastern line
Step 4: Intersecting line with the smaller Germany polygon
Here is what the attributes of the output look like:
head(intersection_lp)
Simple feature collection with 2 features and 3 fields
Geometry type: GEOMETRY
Dimension: XY
Bounding box: xmin: 9.871475 ymin: 50.20032 xmax: 12.07138 ymax: 53.92832
Geodetic CRS: WGS 84
region GID_0 COUNTRY x
1 East DEU Germany MULTILINESTRING ((10.02385 ...
1.24 East DEU Germany LINESTRING (10.91872 53.916...
Step 4: Intersecting line with the smaller Germany polygon
It seems that we have two lines:
line1<-head(intersection_lp, n=1)
line2<-tail(intersection_lp, n=1)
Here is what they look like:
ggplot()+geom_sf(data=line1, color="red")
ggplot()+geom_sf(data=line2, color="blue")
Step 4: Intersecting line with the smaller Germany polygon
Welch Two Sample t-test
data: ger2d_east$lights_1992 and ger2d_west$lights_1992
t = -4.4277, df = 145.75, p-value = 1.857e-05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-9.042632 -3.461273
sample estimates:
mean of x mean of y
13.25170 19.50365
In this case, we reject the null hypothesis (H0) because p < 0.05 and conclude that there is a significant difference between the means of the two groups.
Comparison 2012
What about 2012?
Show the code
library(classInt)# Desaturation libarieslibrary(colorspace)library(colorblindr)library(gridExtra)clint <-classIntervals(ger2d$lights_2012, style ="box")$brksdesaturation <-1# Get the original Viridis paletteoriginal_palette <-viridis_pal()(100)desaturated_palette <-desaturate(original_palette, desaturation)# Plot with desaturated Viridis color scalepicx1_des<-ggplot() +geom_sf(data = ger2d, aes(fill = lights_2012), color ="white", linewidth =0.01) +scale_fill_gradientn(colors = desaturated_palette, breaks =round(clint,2)) +guides(fill =guide_coloursteps(even.steps =FALSE,show.limits =FALSE,title =NULL,barheight =unit(0.9, "npc"), # Relative to plot heightbarwidth =unit(0.05, "npc"), # Relative to plot widthlabel.position ="left"))+theme_bw()+geom_sf(data = east_ger_diss4, color="red", linewidth=1, fill=NA)picx2<-ggplot(ger2d, aes(x = region, y = lights_2012, color=region)) +geom_boxplot()+scale_y_continuous(breaks = clint,labels =round(clint,2)) +ylab(NULL) +xlab(NULL) +theme(axis.text =element_text(colour ="grey"),panel.background =element_blank(),axis.text.x =element_blank(),axis.ticks.x =element_blank())grid.arrange(picx1_des, picx2, ncol=2)
Comparison 2012
But are these differences statistically significant?
Welch Two Sample t-test
data: ger2d_east$lights_2012 and ger2d_west$lights_2012
t = -3.6888, df = 134.52, p-value = 0.0003261
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-9.324930 -2.815704
sample estimates:
mean of x mean of y
19.06810 25.13842
In this case, we reject the null hypothesis (H0) because p < 0.05 and conclude that there is a significant difference between the means of the two groups.
Observations
Thus, even after 20 years (1992-2012), there is still a lingering difference in development between East and West.
What if we were interested in changes in lumnosity?
For example, what if we wanted to examine which are the countries which experiences the most vs. least growth?
We can calculate the difference between 2012 and 1992
a large difference means that countries developed substantially
a small difference means that countries did not develop as much (or maybe even shrunk)
Difference
Let’s calculate the difference between 2012 and 1992.
Welch Two Sample t-test
data: ger2d_east$diff and ger2d_west$diff
t = 0.48112, df = 115.94, p-value = 0.6313
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.5661080 0.9293796
sample estimates:
mean of x mean of y
5.816408 5.634772
In this case, we do not reject the null hypothesis (H0) because p ≥ 0.05 and conclude that there is no significant difference between the means of the two groups.
Refining the Analysis
One criticism to our analysis is that we are comparing units that are too different.
For example, it feels strange to compare units like Lörrach or Uckermark.
Welch Two Sample t-test
data: ger2d_east$lights_1992 and ger2d_west$lights_1992
t = -1.907, df = 125.23, p-value = 0.05881
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-7.2594941 0.1346275
sample estimates:
mean of x mean of y
12.77668 16.33911
In this case, we do not reject the null hypothesis (H0) because p ≥ 0.05 and conclude that there is no significant difference between the means of the two groups.
Comparison 2012
We can also perform the analysis only based on the 100km sample around the border for 2012
Show the code
library(classInt)# Desaturation libarieslibrary(colorspace)library(colorblindr)library(gridExtra)clint <-classIntervals(ger_100km$lights_2012, style ="box")$brksdesaturation <-1# Get the original Viridis paletteoriginal_palette <-viridis_pal()(100)desaturated_palette <-desaturate(original_palette, desaturation)# Plot with desaturated Viridis color scalepicx1_des<-ggplot() +geom_sf(data = ger_100km, aes(fill = lights_2012), color ="white", linewidth =0.01) +scale_fill_gradientn(colors = desaturated_palette, breaks = clint) +guides(fill =guide_coloursteps(even.steps =FALSE,show.limits =FALSE,title =NULL,barheight =unit(0.9, "npc"), # Relative to plot heightbarwidth =unit(0.05, "npc"), # Relative to plot widthlabel.position ="left"))+theme_bw()+geom_sf(data = line1, color="red", linewidth=2)picx2<-ggplot(ger_100km, aes(x = region, y = lights_2012, color=region)) +geom_boxplot()+scale_y_continuous(breaks = clint,labels =round(clint,2)) +ylab(NULL) +xlab(NULL) +theme(axis.text =element_text(colour ="grey"),panel.background =element_blank(),axis.text.x =element_blank(),axis.ticks.x =element_blank())grid.arrange(picx1_des, picx2, ncol=2)
Comparison 2012
But are these differences statistically significant?
Welch Two Sample t-test
data: ger2d_east$lights_2012 and ger2d_west$lights_2012
t = -1.0559, df = 116.9, p-value = 0.2932
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-6.888198 2.097555
sample estimates:
mean of x mean of y
18.77059 21.16591
In this case, we do not reject the null hypothesis (H0) because p ≥ 0.05 and conclude that there is no significant difference between the means of the two groups.
Comparison Difference
Show the code
library(classInt)# Desaturation libarieslibrary(colorspace)library(colorblindr)library(gridExtra)ger_100km$diff<-ger_100km$lights_2012-ger_100km$lights_1992clint <-classIntervals(ger_100km$diff, style ="box")$brksdesaturation <-0.2# Get the original Viridis paletteoriginal_palette <-viridis_pal()(100)desaturated_palette <-desaturate(original_palette, desaturation)# Plot with desaturated Viridis color scalepicx1_des<-ggplot() +geom_sf(data = ger_100km, aes(fill = diff), color ="white", linewidth =0.01) +scale_fill_viridis_c(option ="turbo", # You can choose other options like "A", "B", or "D"#direction = -1, # Use -1 for reverse color orderbreaks = clint,guide =guide_coloursteps(even.steps =FALSE,show.limits =FALSE,title =NULL,barheight =unit(0.9, "npc"), # Relative to plot heightbarwidth =unit(0.05, "npc"), # Relative to plot widthlabel.position ="left"))+theme_bw()+geom_sf(data = line1, color="red", linewidth=2)picx2<-ggplot(ger_100km, aes(x = region, y = diff, color=region)) +geom_boxplot()+scale_y_continuous(breaks = clint,labels =round(clint,2)) +ylab(NULL) +xlab(NULL) +theme(axis.text =element_text(colour ="grey"),panel.background =element_blank(),axis.text.x =element_blank(),axis.ticks.x =element_blank())grid.arrange(picx1_des, picx2, ncol=2)
Difference
Is the difference (2012-1992) statistically significant?
Welch Two Sample t-test
data: ger2d_east$diff and ger2d_west$diff
t = 2.1301, df = 106.88, p-value = 0.03545
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.08094403 2.25328005
sample estimates:
mean of x mean of y
5.993908 4.826796
In this case, we reject the null hypothesis (H0) because p < 0.05 and conclude that there is a significant difference between the means of the two groups.
Observations about the Difference
When using the entire sample, there was a difference in luminosity in 1992 between the East and the West
When using the 100-km sample, there was no difference in luminosity in 1992 between the East and the West
When using the entire sample, there was a difference in luminosity in 2012 between the East and the West
When using the 100-km sample, there was no difference in luminosity in 2012 between the East and the West
When using the entire sample, there was no difference in change (2012-1992) between the East and the West
When using the 100-km sample, there was a difference in change (2012-1992) between the East and the West
Conclusion
We covered ways to work with rasters.
cropping raster to a polygon
zonal statistics: mean
converting rasters (stars) to polygons
subtracting rasters
We also reviewed/learned new ways to work with vectors
Dissolving polygons
Creating buffers to eliminate internal geometry errors